615d3043b35164888b1125ab4a677e1163c00938,src/edu/stanford/nlp/pipeline/ProtobufAnnotationSerializer.java,ProtobufAnnotationSerializer,fromProto,#CoreNLPProtos.Document#,669

Before Change


      CoreMap map = fromProtoNoTokens(sentence);
      if (!tokens.isEmpty() && sentence.hasTokenOffsetBegin() && sentence.hasTokenOffsetEnd()) {
        // Set tokens for sentence
        map.set(TokensAnnotation.class, tokens.subList(sentence.getTokenOffsetBegin(), sentence.getTokenOffsetEnd()));
        // Set sentence index + token index + paragraph index
        for (int i = sentence.getTokenOffsetBegin(); i < sentence.getTokenOffsetEnd(); ++i) {
          tokens.get(i).setSentIndex(sentIndex);

After Change


    Annotation ann = new Annotation(proto.getText());

    // Add tokens
    List<CoreLabel> tokens = new ArrayList<CoreLabel>();
    if (proto.getSentenceCount() > 0) {
      // Populate the tokens from the sentence
      for (CoreNLPProtos.Sentence sentence : proto.getSentenceList()) {
        for (CoreNLPProtos.Token token : sentence.getTokenList()) {
          CoreLabel coreLabel = fromProto(token);
          // Set docid
          if (proto.hasDocID()) { coreLabel.setDocID(proto.getDocID()); }
          tokens.add(coreLabel);
        }
      }
    } else if (proto.getSentencelessTokenCount() > 0) {
      // Eek -- no sentences. Try to recover tokens directly
      if (proto.getSentencelessTokenCount() > 0) {
        for (CoreNLPProtos.Token token : proto.getSentencelessTokenList()) {
          CoreLabel coreLabel = fromProto(token);
          // Set docid
          if (proto.hasDocID()) { coreLabel.setDocID(proto.getDocID()); }
          tokens.add(coreLabel);
        }
      }
    }
    if (!tokens.isEmpty()) { ann.set(TokensAnnotation.class, tokens); }

    // Add sentences
    List<CoreMap> sentences = new ArrayList<CoreMap>(proto.getSentenceCount());
    for (int sentIndex = 0; sentIndex < proto.getSentenceCount(); ++sentIndex) {
      CoreNLPProtos.Sentence sentence = proto.getSentence(sentIndex);
      CoreMap map = fromProtoNoTokens(sentence);
      if (!tokens.isEmpty() && sentence.hasTokenOffsetBegin() && sentence.hasTokenOffsetEnd()) {
        // Set tokens for sentence
        int tokenBegin = Math.min(sentence.getTokenOffsetBegin(), tokens.size());
        int tokenEnd = Math.min(sentence.getTokenOffsetEnd(), tokens.size());
        map.set(TokensAnnotation.class, tokens.subList(tokenBegin, tokenEnd));
        // Set sentence index + token index + paragraph index
        for (int i = tokenBegin; i < tokenEnd; ++i) {
          tokens.get(i).setSentIndex(sentIndex);